Statistical analysis using causal inference
Agalychnis lemur
Purpose
- Determine the adjustment sets that allow to infer a causal effect of environmental variables on vocal activity
- Evaluate effect of environmental factors on vocal activity of A. lemur
Analysis flowchart
Load packages
Code
# Unload packages
out <- sapply(paste('package:', names(sessionInfo()$otherPkgs), sep = ""), function(x) try(detach(x, unload = FALSE, character.only = TRUE), silent = T))
pkgs <- c("devtools", "maRce10/warbleR", "bioacoustics", "pbapply", "Rraven",
"parallel", "viridis", "ggplot2", "knitr", "kableExtra",
"maRce10/ohun", "DT", "formatR", "ranger", "cowplot")
# install/ load packages
sketchy::load_packages(packages = pkgs)
options("digits" = 6, "digits.secs" = 5, knitr.table.format = "html")
# set evaluation false
opts_chunk$set(fig.width = 10, fig.height = 6, warning = FALSE, message = FALSE, tidy = TRUE)
# set working directory as project directory or one directory above,
opts_knit$set(root.dir = "..")Custom functions
Code
# ggplot global setting
theme_set(theme_classic(base_size = 20))1 Downsample
Code
iw <- info_wavs()
table(iw$sample.rate)
# fix_wavs(samp.rate = 10)2 Create stratified 5 minute cuts
Code
# path with waves wav_path <- '/Volumes/CHIRINO/Grabaciones SM4 2019/'
wav_path <- paste0("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur", "/sound_files/10_kHz_5_min_cuts/converted_sound_files/")
# list of wave files
wavs <- list.files(path = wav_path, pattern = "\\.wav$", ignore.case = TRUE, recursive = TRUE,
full.names = TRUE)
# data frame with waves and metadata
wav_df <- data.frame(path = dirname(wavs), file = basename(wavs))
wav_df$site <- ifelse(grepl("sukia", wav_df$file, ignore.case = TRUE), "Sukia", "Chimu")
sm_metadata <- seewave::songmeter(wav_df$file)
wav_df <- cbind(wav_df, sm_metadata)
head(wav_df)
str(wav_df)
par(mfrow = c(2, 1))
hist(wav_df$time[wav_df$site == "Sukia"], breaks = 100, col = adjustcolor("green",
alpha.f = 0.4), main = "Sukia")
hist(wav_df$time[wav_df$site == "Chimu"], breaks = 100, col = adjustcolor("blue",
alpha.f = 0.4), main = "Chimu")
wav_df <- wav_df[order(wav_df$time), ]
wav_df$day_count <- as.vector(round((wav_df$time - min(wav_df$time))/(3600 * 24),
0) + 1)
wav_df$week <- cut(x = wav_df$day_count, breaks = seq(1, max(wav_df$day_count) +
7, by = 7), include.lowest = TRUE)
wav_df <- droplevels(wav_df)
wav_df$period <- 4
wav_df$period[wav_df$hour %in% 17:19] <- 1
wav_df$period[wav_df$hour %in% 20:22] <- 2
wav_df$period[wav_df$hour %in% c(23, 0, 1)] <- 3
# create name combination
wav_df$site.week.period <- paste(wav_df$site, wav_df$week, wav_df$period, sep = "-")
# remove LAguna chimu
wav_df <- wav_df[grep("LAguna Chimu", wav_df$path, invert = TRUE), ]
set.seed(1)
sample_wav_l <- lapply(unique(wav_df$site.week.period), function(x) {
X <- wav_df[wav_df$site.week.period == x, ]
if (nrow(X) > 2)
X <- X[sample(1:nrow(X), size = 2), ]
return(X)
})
sample_wav_df <- do.call(rbind, sample_wav_l)
write.csv(sample_wav_df, file.path("/Volumes/CHIRINO/Proyecto Lemur/INV_2021/5_min_cut_lemur/",
"5_min_cut_metadata.csv"), row.names = FALSE)
sample_wav_df$cut.name <- gsub(".wav", "_5min_cut.wav", sample_wav_df$file)
library(pbapply)
out <- pblapply(1:nrow(sample_wav_df), function(x) {
# read first 5 mins
wv <- try(readWave(file.path(sample_wav_df$path[x], sample_wav_df$file[x]), from = 0,
to = 300, units = "seconds"), silent = TRUE)
if (class(wv) == "Wave")
# save cut
writeWave(wv, filename = file.path("/Volumes/CHIRINO/Proyecto Lemur/INV_2021/5_min_cut_lemur/",
sample_wav_df$cut.name[x]))
if (class(wv) == "Wave")
return(NULL) else return(x)
})2.1 Cheking manual selections
Code
# list all selection tables
sl_tbs <- list.files(paste0("~/Dropbox/estudiantes/Fabiola/proyecto_lemur/data",
"/processed/selection_tables/all_selec_tables_lemur/"))
# remove empty selection table
sl_tbs <- sl_tbs[sl_tbs != "LAGCHIMU_20200919_201500.Table.2.selections.txt"]
# import all selections
sls <- imp_raven(path = paste0("~/Dropbox/estudiantes/Fabiola/proyecto_lemur/data",
"/processed/selection_tables/all_selec_tables_lemur/"), warbler.format = TRUE,
all.data = TRUE, files = sl_tbs)
# relabel selec column
sls$selec <- 1:nrow(sls)
# check duration
sls$duration <- sls$end - sls$start
unique(sls$sound.files[sls$duration > 1])
# remove temporarily but must be fixed on the original individual selection
# tables
sls <- sls[sls$duration < 1, ]
# number of sound files and selection files should be the same
length(unique(sls$sound.files)) == length(unique(sls$selec.file))
# find files with more than 1 selection tab <-
# table(sls$sound.files[!duplicated(sls$selec.file)]) tab[tab > 1]
# unique(sls$selec.file[sls$sound.files ==
# 'LAGCHIMU_20190709_200000_5min_cut.wav'])
# check selections
cs <- check_sels(X = sls, pb = FALSE)
table(cs$check.res[cs$check.res != "OK"])
# those with errors were empty selections (single points)
sls <- sls[cs$check.res == "OK", ]
# check for duplicates (if any sound files was anottated more than once)
sls <- overlapping_sels(sls, indx.row = TRUE, pb = FALSE)
sls <- sls[!duplicated(sls$indx.row, incomparables = NA), ]
# should be 4 in 'SUKIA_20200207_220000_5min_cut.wav'
table(sls$sound.files[!is.na(sls$indx.row)])
# export raven multiple sound file selection table
exp_raven(sls, path = "./data/processed/selection_tables/", sound.file.path = .Options$warbleR$path,
file.name = "temporary_doublechecking_all_selections")
# fix path for Fabis laptop
fix_path(path = "./data/processed/selection_tables/", new.begin.path = paste0("/Users/fabiolachirino/Dropbox/Fabiola/proyecto_lemur",
"/data/raw/sound_files/10_kHz_5_min_cuts/"), sound.file.col = "Begin File")
# create sound file labels
sf.abbr <- gsub("0000_5min_cut.wav", "", sls$sound.files)
sf.abbr <- gsub("SUKIA_20", "S", sf.abbr)
sls$sf.abbr <- gsub("LAGCHIMU_20", "L", sf.abbr)
sls_est <- selection_table(sls, path = "/media/m/CHIRINO/Proyecto Lemur/INV_2021/5_min_cut_data/converted_sound_files",
extended = TRUE)
saveRDS(sls_est, "./data/processed/extended_selection_table_manual_annotations_lemur.RDS")Code
traing_dat <- imp_raven(path = "./data/processed/selection_tables/", files = "temporary_doublechecking_all_selections.txt")
train_files <- unique(traing_dat$`Begin File`)
fc <- file.copy(from = file.path("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur",
"sound_files/10_kHz_5_min_cuts/", train_files), to = file.path("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur",
"/sound_files/training_sound_files/", train_files))
sls$tags <- "lemur"
exp_raven(sls[, c("sound.files", "selec", "start", "end", "bottom.freq", "top.freq",
"tags")], path = paste0("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur",
"/sound_files/training_sound_files/"), sound.file.path = .Options$warbleR$path,
file.name = "annotations_raven_format")
fix_path(path = paste0("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur", "/sound_files/training_sound_files/"),
new.begin.path = "", sound.file.col = "Begin File")
fix_path(path = paste0("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur", "/sound_files/training_sound_files/"),
new.begin.path = "", sound.file.col = "Begin Path")2.2 Create catalogs
Code
sls_est <- readRDS("./data/processed/extended_selection_table_manual_annotations_lemur.RDS")
# create catalogs
catalog(X = sls_est, flim = c(1, 3.5), nrow = 12, ncol = 15, ovlp = 70, height = 15,
width = 23, same.time.scale = TRUE, mar = 0.005, wl = 512, gr = FALSE, spec.mar = 0.4,
lab.mar = 0.8, rm.axes = TRUE, by.row = TRUE, box = TRUE, labels = c("sf.abbr",
"selec"), fast.spec = TRUE, pal = viridis, parallel = 10)
# move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)
# catalog for paper
catalog(X = sls_est[1:180, ], flim = c(1, 3.5), nrow = 12, ncol = 15, ovlp = 90,
height = 15, width = 23, same.time.scale = TRUE, mar = 0.1, wl = 512, gr = FALSE,
spec.mar = 0, lab.mar = 1e-04, rm.axes = TRUE, by.row = TRUE, box = FALSE, fast.spec = FALSE,
pal = viridis, parallel = 1, img.suffix = "no_margins", collevels = seq(-115,
0, 5))
# move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)2.3 Measure frequency range parameters
To figure out variation in signal structure aiming to define templates
Code
# measure spectrographic parameters
spectral_parameters <- spectro_analysis(sls, bp = "frange", fast = TRUE, ovlp = 70,
pb = FALSE)
saveRDS(spectral_parameters, "./data/processed/acoustic_parameters_and_selections.RDS")Code
spectral_parameters <- readRDS("./data/processed/acoustic_parameters_and_selections.RDS")
sls <- imp_raven(path = "./data/processed/selection_tables/", files = "temporary_doublechecking_all_selections.txt",
pb = FALSE, warbler.format = TRUE)
# measure signal_2_noise ratio sls <- signal_2_noise(sls, mar = 0.1, before =
# TRUE, pb = FALSE, bp = 'frange')
pca <- prcomp(spectral_parameters[, 2:27], scale. = TRUE)
# extrac SNR and peak freq
sls$meanpeakf <- spectral_parameters$meanpeakf
sls$duration <- spectral_parameters$duration
sls$sp.ent <- spectral_parameters$sp.ent
sls$PC1 <- pca$x[, 1]
# stck_parameters$PC1
stck_parameters <- stack(sls[, c("duration", "meanpeakf", "sp.ent", "PC1")])
ggplot(stck_parameters, aes(x = values)) + geom_histogram(fill = viridis(10, alpha = 0.9)[2]) +
facet_wrap(~ind, scales = "free")Code
spectral_parameters <- spectral_parameters[!is.infinite(spectral_parameters$SNR),
]
freq_variation <- sapply(c("duration", "meanpeakf", "sp.ent", "PC1"), function(x) data.frame(mean = mean(sls[,
x]), min = min(sls[, x]), max = max(sls[, x]), sd = sd(sls[, x]), CI.2.5 = quantile(sls[,
x], 0.025), CI.97.5 = quantile(sls[, x], 0.975)))
freq_variation <- do.call(rbind, lapply(data.frame(freq_variation), function(x) round(unlist(x),
3)))
# print table as kable
kb <- kable(freq_variation, row.names = TRUE, digits = 3)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)| mean | min | max | sd | CI.2.5 | CI.97.5 | |
|---|---|---|---|---|---|---|
| duration | 0.067 | 0.012 | 0.972 | 0.037 | 0.029 | 0.139 |
| meanpeakf | 2.337 | 1.199 | 2.956 | 0.109 | 2.145 | 2.517 |
| sp.ent | 0.873 | 0.623 | 0.993 | 0.055 | 0.756 | 0.960 |
| PC1 | 0.000 | -21.798 | 11.555 | 3.066 | -5.660 | 4.920 |
2.4 Template-based detection
Code
# find template close to mean peak freq
mean_peak_indx <- which.min(abs(sls$meanpeakf - freq_variation[rownames(freq_variation) ==
"meanpeakf", 1]))
# find template close to mean duration
mean_duration_indx <- which.min(abs(sls$duration - freq_variation[rownames(freq_variation) ==
"duration", 1]))
# find template close to mean PC1
mean_pc1_indx <- which.min(abs(sls$PC1 - freq_variation[rownames(freq_variation) ==
"PC1", 1]))
# put both templates together
templates <- sls[c(mean_peak_indx, mean_duration_indx, mean_pc1_indx), ]
# label templates
templates$template.type <- c("mean.peak.freq", "mean.duration", "mean.PC1")
# create ext. selection table
templates_est <- selection_table(templates, extended = TRUE, confirm.extended = FALSE,
fix.selec = TRUE)
templates_est <- rename_est_waves(templates_est, new.sound.files = templates$template.type)
catalog(templates_est, nrow = 2, ncol = 2, ovlp = 70, height = 10, width = 14, same.time.scale = TRUE,
mar = 1, wl = 512, gr = FALSE, spec.mar = 0.6, lab.mar = 1, rm.axes = FALSE,
by.row = TRUE, box = TRUE, labels = "sound.files", fast.spec = FALSE, pal = viridis,
flim = c(1, 3.5), img.suffix = "templates", collevels = seq(-100, 0, 5))
te2 <- te <- templates_est[rep(2:4, 2), ]
# te2 <- rename_est_waves(te2, new.sound.files = letters[1:3])
te2$selec <- 2
te3 <- rbind(te, te2)
catalog(templates_est, nrow = 2, ncol = 3, ovlp = 90, height = 10, width = 14, same.time.scale = TRUE,
mar = 1, wl = 512, gr = FALSE, spec.mar = 0.6, lab.mar = 2, rm.axes = FALSE,
by.row = TRUE, box = FALSE, labels = "sound.files", fast.spec = FALSE, pal = viridis,
flim = c(1, 3.5), img.suffix = "templates", collevels = seq(-100, 0, 5))
# # move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)
# save est
saveRDS(templates_est, "./data/processed/mean_acoustic_parameter_templates_est.RDS")2.5 Detection perfomance
Using 4 templates:
- mean_peakf: the call with the peak frequency closest to the mean of the population
Code
templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")
paral <- 15
# use mean peak freq sels as template, fourier spectrograms
corr_templ <- template_correlator(templates = templates_est, files = unique(sls$sound.files),
path = .Options$warbleR$path, parallel = paral, hop.size = 11.6, ovlp = 70)
saveRDS(corr_templ, "./data/processed/template_correlations_4_templates.RDS")Code
corr_templ <- readRDS("./data/processed/template_correlations_4_templates.RDS")
optimize_fourier_detec <- optimize_template_detector(reference = sls, template.correlations = corr_templ,
threshold = seq(0.05, 0.5, 0.01), cores = 10, by.sound.file = TRUE, pb = TRUE)
# optimize_fourier_detec$recall <- optimize_fourier_detec$sensitivity
# optimize_fourier_detec$precision <- optimize_fourier_detec$specificity
# optimize_fourier_detec$sensitivity <- optimize_fourier_detec$specificity <-
# NULL optimize_fourier_detec$f1.score <- optimize_fourier_detec$recall *
# optimize_fourier_detec$precision
saveRDS(optimize_fourier_detec, "./data/processed/optimization_results_4_templates.RDS")Code
optimize_fourier_detec <- readRDS("./data/processed/optimization_results_4_templates.RDS")
optimize_fourier_detec$templates <- gsub("-1", "", optimize_fourier_detec$templates)
# optimize_fourier_detec$overlap <-
# optimize_fourier_detec$proportional.overlap.true.positives
sd_tab <- summarize_diagnostic(optimize_fourier_detec)
saveRDS(sd_tab, "./data/processed/summary_optimization_results_4_templates.RDS")Code
sd_tab <- readRDS("./data/processed/optimization_results_4_templates.RDS")
sd_tab <- sd_tab[grep("SNR", sd_tab$templates, invert = TRUE), ]
# sd_tab$f1.score <- sd_tab$rec * sd_tab$specificity
agg_sd <- aggregate(cbind(recall, precision, f.score) ~ threshold + templates, data = sd_tab,
mean)
ggplot(agg_sd, aes(x = threshold, y = recall, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 1) + theme_classic()Code
ggplot(agg_sd, aes(x = threshold, y = precision, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 0.8) + theme_classic()Code
ggplot(agg_sd, aes(y = precision, x = recall, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 1) + theme_classic()Code
ggplot(agg_sd, aes(x = threshold, y = f.score, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 0.8) + theme_classic()Code
stck_agg_sd <- cbind(agg_sd[, 1:2], stack(agg_sd[, 3:5]))
stck_agg_sd$ind <- factor(stck_agg_sd$ind, labels = c("Recall", "Precision", "F1 score"))
# ggf1 <-
ggplot(stck_agg_sd, aes(x = threshold, y = values, group = templates, color = templates)) +
geom_line() + geom_point() + labs(x = "Correlation threshold", color = "Template",
y = "") + facet_wrap(~ind, nrow = 1, scales = "free_y") + scale_color_viridis_d(end = 0.8,
labels = c("Mean duration", "Mean PC1", "Mean peak frequency"), alpha = 0.5) +
theme_classic()Code
# ggsave('./data/processed/multipanel_perfomance_indices.jpeg', width = 9,
# height = 4, dpi = 300)
# print dynamic table
oa_DT <- datatable(sd_tab, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(sd_tab, is.numeric), 3)Code
# figure with recall and precision
sub_stck_agg_sd <- stck_agg_sd[stck_agg_sd$ind != "F1 score", ]
sub_stck_agg_sd$ind <- paste(sub_stck_agg_sd$ind, "(%)")
sub_stck_agg_sd$ind <- factor(sub_stck_agg_sd$ind, levels = c("Recall (%)", "Precision (%)"))
pc1_0.43thr_dat <- sub_stck_agg_sd[sub_stck_agg_sd$threshold == 0.43 & sub_stck_agg_sd$templates ==
"mean.PC1-1", ]
gg_recall <- ggplot(sub_stck_agg_sd, aes(x = threshold, y = values * 100, group = templates,
color = templates, shape = templates)) + geom_hline(data = pc1_0.43thr_dat, aes(yintercept = values *
100), lty = 2, color = "gray") + geom_vline(xintercept = 0.43, lty = 2, color = "gray") +
geom_line() + geom_point(size = 4) + labs(x = "Correlation threshold", color = "",
y = "", shape = "") + facet_wrap(~ind, nrow = 1, scales = "free_y") + scale_shape_discrete(labels = c("Mean duration",
"Mean PC1", "Mean peak frequency")) + scale_color_viridis_d(end = 0.8, labels = c("Mean duration",
"Mean PC1", "Mean peak frequency"), alpha = 0.5) + theme_classic(base_size = 25) +
theme(legend.position = c(0.76, 0.75), legend.background = element_rect(fill = NA))3 Template fourier mean.PC1 threshold 0.43
Choose ‘fourier mean.PC1’ template with a threshold of 0.43 due to a good sensitivity and relatively low number of false positives
Code
templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")
corr_templ <- template_correlator(templates = templates_est[templates_est$sound.files ==
"mean.PC1", , drop = FALSE], files = unique(sls$sound.files), path = .Options$warbleR$path,
parallel = 10, hop.size = 11.6, ovlp = 70)
detec <- template_detector(template.correlations = corr_templ, threshold = 0.43,
parallel = 10, pb = TRUE)
diagnostic <- diagnose_detection(reference = sls, detection = detec, parallel = 10,
by.sound.file = TRUE)
summarize_diagnostic(diagnostic)
saveRDS(diagnostic, "./data/processed/diagnostic_mean_pc1_0.43_threshold_diagnostic.RDS")
lab_detec <- label_detection(reference = sls, detection = detec, parallel = 10)
saveRDS(lab_detec, "./data/processed/label_detection_mean_pc1_0.43_threshold.RDS")Code
lab_detec <- readRDS("./data/processed/label_detection_mean_pc1_0.43_threshold.RDS")
filter_lab_detec <- filter_detection(detection = lab_detec, by = "scores")
saveRDS(filter_lab_detec, "./data/processed/filtered_detection_mean_pc1_0.43_threshold.RDS")
diagnose_model <- diagnose_detection(reference = sls, filter_lab_detec, parallel = 10)
diagnose_model$f1.score <- diagnose_model$specificity * diagnose_model$sensitivity
saveRDS(diagnose_model, "./data/processed/diagnostics_detection_mean_pc1_0.43_threshold.RDS")Code
diag <- readRDS("./data/processed/diagnostics_detection_mean_pc1_0.43_threshold.RDS")
# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)3.1 Random forest results
Code
filter_lab_detec <- readRDS("./data/processed/filtered_detection_mean_pc1_0.43_threshold.RDS")
# measure spectrographic parameters
spectral_parameters <- spectro_analysis(filter_lab_detec, bp = c(1, 3.5), fast = TRUE,
ovlp = 70, parallel = 10)
# mfccs <- mfcc_stats(X = lab_detec, bp = c(1, 3.5), ovlp = 70, parallel = 10)
# na_rows <- unique(unlist(sapply(mfccs, function(x) which(is.na(x)))))
# lab_detec <- lab_detec[-na_rows, ] spectral_parameters <-
# spectral_parameters[-na_rows, ] mfccs <- mfccs[-na_rows, ]
spectral_parameters$class <- filter_lab_detec$detection.class
# spectral_parameters <- data.frame(spectral_parameters, mfccs[,
# !names(spectral_parameters) %in% c('sound.files', 'selec')])
spectral_parameters$class[spectral_parameters$class != "false.positive"] <- "true.positive"
# make it a factor for ranger to work
spectral_parameters$class <- as.factor(spectral_parameters$class)
# run RF model spectral and cepstral parameters
rfm <- ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in%
c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)
saveRDS(list(rfm = rfm, spectral_parameters = spectral_parameters, filter_lab_detec = filter_lab_detec),
paste0("./data/processed/data_and_model_random_forest_0.43_", "threshold_only_spectro_parameters.RDS"))Code
# attach(readRDS("./data/processed/data_and_model_random_forest_0.43_threshold.RDS"))
attach(readRDS(paste0("./data/processed/data_and_model_random_forest_0.43_",
"threshold_only_spectro_parameters.RDS")))
rfmRanger result
Call:
ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in% c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)
Type: Classification
Number of trees: 10000
Sample size: 89803
Number of independent variables: 26
Mtry: 5
Target node size: 1
Variable importance mode: impurity
Splitrule: gini
OOB prediction error: 0.88 %
Diagnostic after random forest classification:
Code
filter_lab_detec$pred.class <- rfm$predictions
positive_detec <- filter_lab_detec[filter_lab_detec$pred.class == "true.positive",
]
# save for trying stats write.csv(positive_detec,
# './data/processed/true_positive_detections_on_training_recordings.csv',
# row.names = FALSE)
# exp_raven(positive_detec, file.name =
# './data/processed/true_positive_detections_on_training_recordings',
# sound.file.path = .Options$warbleR$path)
# fix path for Fabis laptop fix_path(path = './data/processed/',new.begin.path
# =
# '/Users/fabiolachirino/Dropbox/Fabiola/proyecto_lemur/data/raw/sound_files/10_kHz_5_min_cuts/',
# sound.file.col = 'Begin File')
temp_detec <- positive_detec
temp_detec$detection.class <- "true.positive"
temp_detec$selec <- 1:nrow(temp_detec)
diag <- diagnose_detection(reference = sls, detection = temp_detec, pb = FALSE)
# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)Black line = 1:1 gray line = model slope
Code
obs_count <- tapply(sls$sound.files, sls$sound.files, length)
pred_count <- tapply(positive_detec$sound.files, positive_detec$sound.files, length)
int_columns <- intersect(names(obs_count), names(pred_count))
obs_count <- obs_count[names(obs_count) %in% int_columns]
pred_count <- pred_count[names(pred_count) %in% int_columns]
pred_count <- pred_count[order(names(pred_count))]
obs_count <- obs_count[order(names(obs_count))]
df <- data.frame(sound.files = names(obs_count), observed = obs_count, predicted = pred_count)
ggplot(df, aes(x = observed, y = predicted)) + geom_point(color = viridis(10, alpha = 0.4)[2],
size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text", x = 50,
y = 150, label = paste("r =", round(cor(obs_count, pred_count), 3)), size = 8) +
geom_smooth(method = "lm", se = FALSE, col = "gray") + theme_classic(base_size = 20)Code
(lm(pred_count ~ obs_count))
Call:
lm(formula = pred_count ~ obs_count)
Coefficients:
(Intercept) obs_count
-0.970 0.822
Removing 1 outlier
Code
gg_scatter <- ggplot(df[df$observed < 100, ], aes(x = observed, y = predicted)) +
geom_point(color = viridis(10, alpha = 0.4)[5], size = 5) + geom_abline(slope = 1,
intercept = 0) + geom_smooth(method = "lm", se = FALSE, col = "gray") + labs(x = "Observed number of calls",
y = "Predicted\nnumber of calls") + theme_classic(base_size = 30)
gg_scatter
(lm(pred_count[obs_count < 100] ~ obs_count[obs_count < 100]))Code
ggplot(df[df$observed < 100, ], aes(x = observed, y = predicted)) + geom_point(color = alpha("#F2622E",
0.6), size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text", x = 35,
y = 80, label = paste("r =", round(cor(obs_count[obs_count < 100], pred_count[obs_count <
100]), 3)), size = 8) + geom_smooth(method = "lm", se = FALSE, col = "gray") +
theme_classic(base_size = 20) + labs(x = "Cantos observados", y = "Cantos detectados")Code
(lm(pred_count[obs_count < 100] ~ obs_count[obs_count < 100]))
Call:
lm(formula = pred_count[obs_count < 100] ~ obs_count[obs_count <
100])
Coefficients:
(Intercept) obs_count[obs_count < 100]
0.0145 0.7613
4 Graphs for presentations
Code
library(ggplot2)
library(cowplot)
n <- 90000
sd <- 0.3
set.seed(1)
x <- rnorm(n, mean = 0.1, sd = sd)
set.seed(2)
y <- rnorm(n, mean = 0.9, sd = sd)
umbral <- 0.5
df <- data.frame(tipo = rep(c("Otros\nsonidos", "Cantos\nde lemur"), each = n), vals = c(x,
y))
df$tipo <- factor(df$tipo, levels = c("Otros\nsonidos", "Cantos\nde lemur"))
df$vals <- (df$vals + abs(min(df$vals)))/max(df$vals + abs(min(df$vals)))
fill_values <- c("#AFBF35", "#F2622E")
fill_values <- adjustcolor(fill_values, alpha.f = 0.6)
gg1 <- ggplot(df, aes(x = vals, fill = tipo)) + geom_density() + scale_fill_manual(values = fill_values) +
labs(x = "Umbral de correlación", y = "", fill = "") + geom_vline(xintercept = umbral,
lty = 2) + theme_classic(base_size = 25) + theme(legend.position = c(0.9, 0.9))
agg <- aggregate(vals ~ tipo, df[df$vals > umbral, ], length)
agg$tipo <- factor(agg$tipo, levels = c("Otros\nsonidos", "Cantos\nde lemur"))
gg2 <- ggplot(agg, aes(x = tipo, y = vals, fill = tipo)) + geom_col(col = "black") +
scale_fill_manual(values = fill_values, guide = "none") + theme_classic(base_size = 25) +
labs(x = "", y = "Proporción") + ylim(c(0, n)) + theme(axis.text.y = element_blank())
cowplot::plot_grid(gg1, gg2, nrow = 1, rel_widths = c(1.7, 1))Code
# cowplot::ggsave2(plot = ggcp, filename = './output/threshold_0.3.tiff', width
# = 15, height = 5)5 Graphs for paper
Code
est <- readRDS("./data/processed/extended_selection_table_focal_recordings_lemur.RDS")
est <- signal_2_noise(est, mar = 0.05)
summary(est$SNR)
maxsnr <- est[est$SNR > 17, ]
# dev.off() par(mfrow = c(4, 2)) for(i in seq_len(nrow(maxsnr)))
# warbleR:::spectro_wrblr_int(wave = read_sound_file(maxsnr, index = i, from =
# 0.05, to = Inf), wl = 500, grid = F, collevels = seq(-145, 5, 1), ovlp = 99,
# palette = viridis, scale = FALSE, flab = 'Frecuency (kHz)', tlab = 'TIme
# (s)', main = i, flim = c(0.6, 4.5), zp = 1000)
lemur_call <- read_sound_file(maxsnr, index = 2, from = 0.07, to = 0.17)
# for (i in seq(50, 1000, length.out = 20)){ Sys.sleep(1)
# warbleR:::spectro_wrblr_int(wave = lemur_call, wl = i, grid = F, collevels =
# seq(-125, 0 ,1), ovlp = 90, palette = viridis, scale = FALSE, flab =
# 'Frecuencia (kHz)', tlab = 'Tiempo (s)',flim = c(0.6, 4.5), main = i) }
png(filename = "./output/spectro_300dpi.png", res = 300, width = 11.4, height = 6,
units = "in")
par(mar = c(0, 0, 0, 0))
warbleR:::spectro_wrblr_int(wave = lemur_call, wl = 500, grid = F, collevels = seq(-120,
3, 0.5), ovlp = 99, palette = viridis, scale = FALSE, flab = "Frecuency (kHz)",
tlab = "Time (s)", flim = c(0.6, 4.5), zp = 1000)
dev.off()
# cowplot::plot_grid(gg_recall, gg_scatter, nrow = 2, rel_widths = c(1.7, 1))
# other types of plots not generated with ggplot p6 <- ~{ par(mar = c(1,1,1,1))
# warbleR:::spectro_wrblr_int(wave = lemur_call, wl = 500, grid = F, collevels
# = seq(-120, 3, 0.5), ovlp = 99, palette = viridis, scale = FALSE, flab =
# 'Frecuency (kHz)', tlab = 'Time (s)',flim = c(0.6, 4.5), zp = 1000) } pg1 <-
# cowplot::plot_grid(gg_scatter, p6, nrow = 1, rel_widths = c(1, 0.6))
# prediction removing outlier
prd_100 <- (lm(pred_count[obs_count < 100] ~ obs_count[obs_count < 100]))
gg_scatter <- ggplot(df, aes(x = observed, y = predicted)) + geom_point(color = viridis(10,
alpha = 0.4)[5], size = 4) + geom_abline(slope = 1, intercept = 0) + geom_abline(slope = prd_100$coefficients[2],
intercept = prd_100$coefficients[1], lty = 3, color = "gray", lwd = 2) + geom_smooth(method = "lm",
se = FALSE, col = "gray") + labs(x = "Observed number of calls", y = "Detected\nnumber of calls") +
theme_classic(base_size = 30)
gg_sp <- ggspectro(wave = lemur_call, wl = 500, grid = F, collevels = seq(-120, 3,
0.5), ovlp = 9, palette = viridis, scale = FALSE, flab = "Frecuency (kHz)", tlab = "Time (s)",
flim = c(0.6, 4.5), zp = 1000)
bs <- 15
pg1 <- cowplot::plot_grid(gg_scatter + theme_classic(base_size = bs), gg_sp + theme_classic(base_size = bs),
nrow = 1)
pg2 <- cowplot::plot_grid(gg_recall + theme_classic(base_size = bs) + labs(shape = "Templates:",
color = "Templates:") + theme(legend.position = c(0.7, 0.65), legend.background = element_rect(fill = NA)),
pg1, nrow = 2)
pg2 <- pg2 + draw_plot_label(c("A", "B", "C", "D"), x = c(0.025, 0.51, 0.025, 0.51),
y = c(1, 1, 0.52, 0.52))
pg3 <- ggdraw(pg2) + draw_image("./output/spectro_300dpi.png", x = 0.271, y = 0.1024,
height = 0.38, width = 1, hjust = 0, vjust = 0)
pg3
cowplot::ggsave2(plot = pg3, filename = "./output/fig_recall_precision_scatter_spectro_300dpi.png",
dpi = 300, width = 10, height = 6)
#
6 Takeaways
7 Sum up results
8 Next steps
- Run Moon with previous rain 24h with thinning so all models can be merged
Session information
R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=es_CR.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] cowplot_1.1.3 ranger_0.16.0 formatR_1.14 DT_0.32
[5] ohun_1.0.2 kableExtra_1.4.0 ggplot2_3.5.1 viridis_0.6.5
[9] viridisLite_0.4.2 Rraven_1.0.13 pbapply_1.7-2 bioacoustics_0.2.8
[13] warbleR_1.1.30 NatureSounds_1.0.4 knitr_1.46 seewave_2.2.3
[17] tuneR_1.4.6 devtools_2.4.5 usethis_2.2.3
loaded via a namespace (and not attached):
[1] colorspace_2.1-0 rjson_0.2.21 ellipsis_0.3.2
[4] class_7.3-20 fs_1.6.4 xaringanExtra_0.7.0
[7] rstudioapi_0.15.0 proxy_0.4-27 farver_2.1.2
[10] remotes_2.5.0 fansi_1.0.6 xml2_1.3.6
[13] splines_4.1.2 cachem_1.0.8 pkgload_1.3.4
[16] jsonlite_1.8.8 packrat_0.9.2 shiny_1.8.0
[19] compiler_4.1.2 backports_1.4.1 Matrix_1.6-5
[22] fastmap_1.1.1 cli_3.6.2 later_1.3.2
[25] htmltools_0.5.8.1 tools_4.1.2 igraph_2.0.3
[28] gtable_0.3.5 glue_1.7.0 dplyr_1.1.4
[31] Rcpp_1.0.12 jquerylib_0.1.4 vctrs_0.6.5
[34] svglite_2.1.3 nlme_3.1-155 crosstalk_1.2.1
[37] xfun_0.43 stringr_1.5.1 brio_1.1.4
[40] testthat_3.2.1 mime_0.12 miniUI_0.1.1.1
[43] lifecycle_1.0.4 MASS_7.3-55 scales_1.3.0
[46] promises_1.2.1 yaml_2.3.8 memoise_2.0.1
[49] gridExtra_2.3 stringi_1.8.4 highr_0.10
[52] e1071_1.7-14 checkmate_2.3.1 pkgbuild_1.4.4
[55] rlang_1.1.4 pkgconfig_2.0.3 systemfonts_1.0.6
[58] dtw_1.23-1 moments_0.14.1 bitops_1.0-7
[61] evaluate_0.23 lattice_0.20-45 purrr_1.0.2
[64] sf_1.0-6 htmlwidgets_1.6.4 labeling_0.4.3
[67] tidyselect_1.2.1 magrittr_2.0.3 R6_2.5.1
[70] fftw_1.0-8 generics_0.1.3 profvis_0.3.8
[73] DBI_1.2.2 pillar_1.9.0 withr_3.0.0
[76] mgcv_1.8-39 units_0.8-5 RCurl_1.98-1.14
[79] tibble_3.2.1 crayon_1.5.2 KernSmooth_2.23-20
[82] utf8_1.2.4 rmarkdown_2.26 urlchecker_1.0.1
[85] grid_4.1.2 sketchy_1.0.3 digest_0.6.35
[88] classInt_0.4-10 xtable_1.8-4 httpuv_1.6.14
[91] signal_1.8-0 munsell_0.5.1 sessioninfo_1.2.2